Handling image data
Joaquin Vanschoren, Eindhoven University of Technology
# kernel and image_patch are n x n matrices
pixel_out = np.sum(kernel * image_patch)

horizontal_edge_kernel = np.array([[ 1, 2, 1],
[ 0, 0, 0],
[-1, -2, -1]])
diagonal_edge_kernel = np.array([[1, 0, 0],
[0, 1, 0],
[0, 0, 1]])
edge_detect_kernel = np.array([[-1, -1, -1],
[-1, 8, -1],
[-1, -1, -1]])
Demonstration
mnist_data = oml.datasets.get_dataset(554) # Download MNIST data
# Get the predictors X and the labels y
X_mnist, y_mnist, c, a = mnist_data.get_data(dataset_format='array', target=mnist_data.default_target_attribute);
image = X_mnist[1].reshape((28, 28))
image = (image - np.min(image))/np.ptp(image) # Normalize
titles = ('Image and kernel', 'Filtered image')
convolution_demo(image, horizontal_edge_kernel, vmin=-4, vmax=4, titles=titles, cmap='gray_r');
plt.rcParams['figure.dpi'] = 100
fig, axs = plt.subplots(3, 3)
titles = ('Image and kernel', 'Hor. edge filter', 'Filtered image')
convolution_full(axs[0,:], image, horizontal_edge_kernel, vmin=-4, vmax=4, titles=titles, cmap='gray_r')
titles = ('Image and kernel', 'Edge detect filter', 'Filtered image')
convolution_full(axs[1,:], image, edge_detect_kernel, vmin=-4, vmax=4, titles=titles, cmap='gray_r')
titles = ('Image and kernel', 'Diag. edge filter', 'Filtered image')
convolution_full(axs[2,:], image, diagonal_edge_kernel, vmin=-4, vmax=4, titles=titles, cmap='gray_r')
plt.tight_layout()
House numbers photographed from Google streetview imagery, cropped and centered around digits, but with neighboring numbers or other edge artifacts.
SVHN = oml.datasets.get_dataset(41081)
X, y, cats, attrs = SVHN.get_data(dataset_format='array',
target=SVHN.default_target_attribute)
def plot_images(X, y, grayscale=False):
fig, axes = plt.subplots(1, len(X), figsize=(10, 5))
for n in range(len(X)):
if grayscale:
axes[n].imshow(X[n].reshape(32, 32)/255, cmap='gray')
else:
axes[n].imshow(X[n].reshape(32, 32, 3)/255)
axes[n].set_xlabel((y[n]+1)) # Label is index+1
axes[n].set_xticks(()), axes[n].set_yticks(())
plt.show();
images = range(5)
X_sub_color = [X[i] for i in images]
y_sub = [y[i] for i in images]
plt.rcParams['figure.dpi'] = 60
plot_images(X_sub_color, y_sub)
For recognizing digits, color is not important, so we grayscale the images
def rgb2gray(X, dim=32):
return np.expand_dims(np.dot(X.reshape(len(X), dim*dim, 3), [0.2990, 0.5870, 0.1140]), axis=2)
Xsm = rgb2gray(X[:100])
X_sub = [Xsm[i] for i in images]
plot_images(X_sub, y_sub, grayscale=True)
Demonstration
plt.rcParams['figure.dpi'] = 100
def normalize_image(X):
image = X.reshape((32, 32))
return (image - np.min(image))/np.ptp(image) # Normalize
image = normalize_image(X_sub[3])
demo2 = convolution_demo(image, horizontal_edge_kernel,
vmin=-4, vmax=4, cmap='gray_r');
fig, axs = plt.subplots(3, 3)
titles = ('Image and kernel', 'Hor. edge filter', 'Filtered image')
convolution_full(axs[0,:], image, horizontal_edge_kernel, vmin=-4, vmax=4, titles=titles, cmap='gray_r')
titles = ('Image and kernel', 'Diag. edge filter', 'Filtered image')
convolution_full(axs[1,:], image, diagonal_edge_kernel, vmin=-4, vmax=4, titles=titles, cmap='gray_r')
titles = ('Image and kernel', 'Edge detect filter', 'Filtered image')
convolution_full(axs[2,:], image, edge_detect_kernel, vmin=-4, vmax=4, titles=titles, cmap='gray_r')
plt.tight_layout()
Demonstration
from scipy import ndimage as ndi
from skimage import data
from skimage.util import img_as_float
from skimage.filters import gabor_kernel
# Gabor Filters.
@interact
def demoGabor(frequency=(0.01,1,0.05), theta=(0,3.14,0.1), sigma=(0,5,0.1)):
plt.gray()
plt.imshow(np.real(gabor_kernel(frequency=frequency, theta=theta, sigma_x=sigma, sigma_y=sigma)), interpolation='nearest')
plt.subplot(1, 3, 1)
demoGabor(frequency=0.16, theta=1.2, sigma=4.0)
plt.subplot(1, 3, 2)
demoGabor(frequency=0.31, theta=0, sigma=3.6)
plt.subplot(1, 3, 3)
demoGabor(frequency=0.36, theta=1.6, sigma=1.3)
plt.tight_layout()
Demonstration on the streetview data
# Calculate the magnitude of the Gabor filter response given a kernel and an imput image
def magnitude(image, kernel):
image = (image - image.mean()) / image.std() # Normalize images
return np.sqrt(ndi.convolve(image, np.real(kernel), mode='wrap')**2 +
ndi.convolve(image, np.imag(kernel), mode='wrap')**2)
@interact
def demoGabor2(frequency=(0.01,1,0.05), theta=(0,3.14,0.1), sigma=(0,5,0.1)):
plt.subplot(131)
plt.title('Original')
plt.imshow(image)
plt.subplot(132)
plt.title('Gabor kernel')
plt.imshow(np.real(gabor_kernel(frequency=frequency, theta=theta, sigma_x=sigma, sigma_y=sigma)), interpolation='nearest')
plt.subplot(133)
plt.title('Response magnitude')
plt.imshow(np.real(magnitude(image, gabor_kernel(frequency=frequency, theta=theta, sigma_x=sigma, sigma_y=sigma))), interpolation='nearest')
plt.tight_layout()
demoGabor2(frequency=0.16, theta=1.4, sigma=1.2)
# More images
image3 = normalize_image(Xsm[3])
image5 = normalize_image(Xsm[5])
image13 = normalize_image(Xsm[13])
image_names = ('3', '5', '8') # labels
images = (image3, image5, image13)
def plot_filter_bank(images):
# Create a set of kernels, apply them to each image, store the results
results = []
kernel_params = []
for theta in (0, 1):
theta = theta / 4. * np.pi
for frequency in (0.1, 0.2):
for sigma in (1, 3):
kernel = gabor_kernel(frequency, theta=theta,sigma_x=sigma,sigma_y=sigma)
params = 'theta=%.2f,\nfrequency=%.2f\nsigma=%.2f' % (theta, frequency, sigma)
kernel_params.append(params)
results.append((kernel, [magnitude(img, kernel) for img in images]))
# Plotting
fig, axes = plt.subplots(nrows=9, ncols=4, figsize=(6, 12))
plt.gray()
#fig.suptitle('Image responses for Gabor filter kernels', fontsize=12)
axes[0][0].axis('off')
# Plot original images
for label, img, ax in zip(image_names, images, axes[0][1:]):
ax.imshow(img)
ax.set_title(label, fontsize=9)
ax.axis('off')
for label, (kernel, magnitudes), ax_row in zip(kernel_params, results, axes[1:]):
# Plot Gabor kernel
ax = ax_row[0]
ax.imshow(np.real(kernel), interpolation='nearest') # Plot kernel
ax.set_ylabel(label, fontsize=7)
ax.set_xticks([]) # Remove axis ticks
ax.set_yticks([])
# Plot Gabor responses with the contrast normalized for each filter
vmin = np.min(magnitudes)
vmax = np.max(magnitudes)
for patch, ax in zip(magnitudes, ax_row[1:]):
ax.imshow(patch, vmin=vmin, vmax=vmax) # Plot convolutions
ax.axis('off')
plt.rcParams['figure.dpi'] = 80
plt.show()
plot_filter_bank(images)
plt.rcParams['figure.dpi'] = 100
Another example: Fashion MNIST
plt.rcParams['figure.dpi'] = 200
# build a list of figures for plotting
def buildFigureList(fig, subfiglist, titles, length):
for i in range(0,length):
pixels = np.array(subfiglist[i], dtype='float')
pixels = pixels.reshape((28, 28))
a=fig.add_subplot(1,length,i+1)
imgplot =plt.imshow(pixels, cmap='gray_r')
a.set_title(titles[i], fontsize=6)
a.axes.get_xaxis().set_visible(False)
a.axes.get_yaxis().set_visible(False)
return
subfiglist = []
titles=[]
for i in range(0,10):
subfiglist.append(X_fm[i])
titles.append(i)
buildFigureList(plt.figure(1),subfiglist, titles, 10)
plt.show()
Demonstration
plt.rcParams['figure.dpi'] = 100
boot = X_fm[0].reshape((28, 28))
image=boot
@interact
def demoGabor2(frequency=(0.01,1,0.05), theta=(0,3.14,0.1), sigma=(0,5,0.1)):
plt.subplot(131)
plt.title('Original')
plt.imshow(image)
plt.subplot(132)
plt.title('Gabor kernel')
plt.imshow(np.real(gabor_kernel(frequency=frequency, theta=theta, sigma_x=sigma, sigma_y=sigma)), interpolation='nearest')
plt.subplot(133)
plt.title('Response magnitude')
plt.imshow(np.real(magnitude(image, gabor_kernel(frequency=frequency, theta=theta, sigma_x=sigma, sigma_y=sigma))), interpolation='nearest')
demoGabor2(frequency=0.81, theta=2.7, sigma=0.9)
Fashion MNIST with multiple filters (filter bank)
# Fetch some Fashion-MNIST images
boot = X_fm[0].reshape(28, 28)
shirt = X_fm[1].reshape(28, 28)
dress = X_fm[2].reshape(28, 28)
image_names = ('boot', 'shirt', 'dress')
images = (boot, shirt, dress)
plot_filter_bank(images)
plt.rcParams['figure.dpi'] = 100





pure convnets, one input value spreads to 3x3 nodes of the first layer, 5x5 nodes of the second, etc.Example with Keras:
Conv2D for 2D convolutional layersMaxPooling2D for max-poolingmodel = models.Sequential()
model.add(layers.Conv2D(32, (3, 3), activation='relu',
input_shape=(28, 28, 1)))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(64, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(64, (3, 3), activation='relu'))
Observe how the input image on 28x28x1 is transformed to a 3x3x64 feature map
model.summary()
Model: "sequential" _________________________________________________________________ Layer (type) Output Shape Param # ================================================================= conv2d (Conv2D) (None, 26, 26, 32) 320 _________________________________________________________________ max_pooling2d (MaxPooling2D) (None, 13, 13, 32) 0 _________________________________________________________________ conv2d_1 (Conv2D) (None, 11, 11, 64) 18496 _________________________________________________________________ max_pooling2d_1 (MaxPooling2 (None, 5, 5, 64) 0 _________________________________________________________________ conv2d_2 (Conv2D) (None, 3, 3, 64) 36928 ================================================================= Total params: 55,744 Trainable params: 55,744 Non-trainable params: 0 _________________________________________________________________
Completing the network
model.add(layers.Flatten())
model.add(layers.Dense(64, activation='relu'))
model.add(layers.Dense(10, activation='softmax'))
Complete network
model.summary()
Model: "sequential" _________________________________________________________________ Layer (type) Output Shape Param # ================================================================= conv2d (Conv2D) (None, 26, 26, 32) 320 _________________________________________________________________ max_pooling2d (MaxPooling2D) (None, 13, 13, 32) 0 _________________________________________________________________ conv2d_1 (Conv2D) (None, 11, 11, 64) 18496 _________________________________________________________________ max_pooling2d_1 (MaxPooling2 (None, 5, 5, 64) 0 _________________________________________________________________ conv2d_2 (Conv2D) (None, 3, 3, 64) 36928 _________________________________________________________________ flatten (Flatten) (None, 576) 0 _________________________________________________________________ dense (Dense) (None, 64) 36928 _________________________________________________________________ dense_1 (Dense) (None, 10) 650 ================================================================= Total params: 93,322 Trainable params: 93,322 Non-trainable params: 0 _________________________________________________________________
Run the model on MNIST dataset
from tensorflow.keras.models import load_model
model = load_model(os.path.join(model_dir, 'cats_and_dogs_small_0.h5'))
test_loss, test_acc = model.evaluate(test_images, test_labels)
print("Accuracy: ", test_acc)
history = pickle.load(open("../data/models/cats_and_dogs_small_0_history.p", "rb"))
pd.DataFrame(history).plot(lw=2,style=['b:','r:','b-','r-']);
plt.xlabel('epochs');
10000/10000 [==============================] - 3s 266us/sample - loss: 0.0355 - accuracy: 0.9902 Accuracy: 0.9902
Tip:
model.save(os.path.join(model_dir, 'cats_and_dogs_small.h5'))
with open(os.path.join(model_dir, 'cats_and_dogs_small_history.p'), 'wb') as file_pi:
pickle.dump(history.history, file_pi)
ImageDataGenerator: allows to encode, resize, and rescale JPEG imagestrain_generator = ImageDataGenerator(rescale=1./255).flow_from_directory(
train_dir, # Directory with images
target_size=(150, 150), # Resize images
batch_size=20, # Return 20 images at a time
class_mode='binary') # Binary labels
for data_batch, labels_batch in train_generator:
plt.figure(figsize=(10,5))
for i in range(7):
plt.subplot(171+i)
plt.xticks([])
plt.yticks([])
imgplot = plt.imshow(data_batch[i])
plt.title('cat' if labels_batch[i] == 0 else 'dog')
plt.tight_layout()
break
Since the images are larger and more complex, we add another convolutional layer and increase the number of filters to 128.
model = models.Sequential()
model.add(layers.Conv2D(32, (3, 3), activation='relu',
input_shape=(150, 150, 3)))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(64, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(128, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(128, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Flatten())
model.add(layers.Dense(512, activation='relu'))
model.add(layers.Dense(1, activation='sigmoid'))
model.summary()
Model: "sequential_1" _________________________________________________________________ Layer (type) Output Shape Param # ================================================================= conv2d_3 (Conv2D) (None, 148, 148, 32) 896 _________________________________________________________________ max_pooling2d_2 (MaxPooling2 (None, 74, 74, 32) 0 _________________________________________________________________ conv2d_4 (Conv2D) (None, 72, 72, 64) 18496 _________________________________________________________________ max_pooling2d_3 (MaxPooling2 (None, 36, 36, 64) 0 _________________________________________________________________ conv2d_5 (Conv2D) (None, 34, 34, 128) 73856 _________________________________________________________________ max_pooling2d_4 (MaxPooling2 (None, 17, 17, 128) 0 _________________________________________________________________ conv2d_6 (Conv2D) (None, 15, 15, 128) 147584 _________________________________________________________________ max_pooling2d_5 (MaxPooling2 (None, 7, 7, 128) 0 _________________________________________________________________ flatten_1 (Flatten) (None, 6272) 0 _________________________________________________________________ dense_2 (Dense) (None, 512) 3211776 _________________________________________________________________ dense_3 (Dense) (None, 1) 513 ================================================================= Total params: 3,453,121 Trainable params: 3,453,121 Non-trainable params: 0 _________________________________________________________________
fit function also supports generatorsmodel.compile(loss='binary_crossentropy',
optimizer=optimizers.RMSprop(lr=1e-4),
metrics=['acc'])
history = model.fit(
train_generator, steps_per_epoch=100,
epochs=30, verbose=0,
validation_data=validation_generator,
validation_steps=50)
import pickle
history = pickle.load(open("../data/models/cats_and_dogs_small_1_history.p", "rb"))
pd.DataFrame(history).plot(lw=2,style=['b:','r:','b-','r-']);
plt.xlabel('epochs');
datagen = ImageDataGenerator(
rotation_range=40, # Rotate image up to 40 degrees
width_shift_range=0.2, # Shift image left-right up to 20% of image width
height_shift_range=0.2,# Shift image up-down up to 20% of image height
shear_range=0.2, # Shear (slant) the image up to 0.2 degrees
zoom_range=0.2, # Zoom in up to 20%
horizontal_flip=True, # Horizontally flip the image
fill_mode='nearest')
Example
# This is module with image preprocessing utilities
from tensorflow.keras.preprocessing import image
plt.rcParams['figure.dpi'] = 120
train_cats_dir = os.path.join(base_dir, 'train', 'cats')
fnames = [os.path.join(train_cats_dir, fname) for fname in os.listdir(train_cats_dir)]
# We pick one image to "augment"
img_path = fnames[5]
# Read the image and resize it
img = image.load_img(img_path, target_size=(150, 150))
# Convert it to a Numpy array with shape (150, 150, 3)
x = image.img_to_array(img)
# Reshape it to (1, 150, 150, 3)
x = x.reshape((1,) + x.shape)
# The .flow() command below generates batches of randomly transformed images.
# It will loop indefinitely, so we need to `break` the loop at some point!
for a in range(2):
i = 0
for batch in datagen.flow(x, batch_size=1):
plt.subplot(141+i)
plt.xticks([])
plt.yticks([])
imgplot = plt.imshow(image.array_to_img(batch[0]))
i += 1
if i % 4 == 0:
break
plt.tight_layout()
plt.show()
We also add Dropout before the Dense layer
model = models.Sequential()
model.add(layers.Conv2D(32, (3, 3), activation='relu',
input_shape=(150, 150, 3)))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(64, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(128, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(128, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Flatten())
model.add(layers.Dropout(0.5))
model.add(layers.Dense(512, activation='relu'))
model.add(layers.Dense(1, activation='sigmoid'))
(Almost) no more overfitting!
history = pickle.load(open("../data/models/cats_and_dogs_small_2_history.p", "rb"))
pd.DataFrame(history).plot(lw=2,style=['b:','r:','b-','r-']);
plt.xlabel('epochs');
from tensorflow.keras.models import load_model
model = load_model(os.path.join(model_dir, 'cats_and_dogs_small_2.h5'))
model.summary() # As a reminder.
Model: "sequential_2" _________________________________________________________________ Layer (type) Output Shape Param # ================================================================= conv2d_8 (Conv2D) (None, 148, 148, 32) 896 _________________________________________________________________ max_pooling2d_8 (MaxPooling2 (None, 74, 74, 32) 0 _________________________________________________________________ conv2d_9 (Conv2D) (None, 72, 72, 64) 18496 _________________________________________________________________ max_pooling2d_9 (MaxPooling2 (None, 36, 36, 64) 0 _________________________________________________________________ conv2d_10 (Conv2D) (None, 34, 34, 128) 73856 _________________________________________________________________ max_pooling2d_10 (MaxPooling (None, 17, 17, 128) 0 _________________________________________________________________ conv2d_11 (Conv2D) (None, 15, 15, 128) 147584 _________________________________________________________________ max_pooling2d_11 (MaxPooling (None, 7, 7, 128) 0 _________________________________________________________________ flatten_2 (Flatten) (None, 6272) 0 _________________________________________________________________ dropout_2 (Dropout) (None, 6272) 0 _________________________________________________________________ dense_4 (Dense) (None, 512) 3211776 _________________________________________________________________ dense_5 (Dense) (None, 1) 513 ================================================================= Total params: 3,453,121 Trainable params: 3,453,121 Non-trainable params: 0 _________________________________________________________________
layer_outputs = [layer.output for layer in model.layers[:8]]
activation_model = models.Model(inputs=model.input, outputs=layer_outputs)
activations = activation_model.predict(img_tensor)
Output of the first Conv2D layer, 3rd channel (filter):
plt.rcParams['figure.dpi'] = 120
first_layer_activation = activations[0]
f, (ax1, ax2) = plt.subplots(1, 2, sharey=True)
ax1.imshow(img_tensor[0])
ax2.matshow(first_layer_activation[0, :, :, 2], cmap='viridis')
ax1.set_xticks([])
ax1.set_yticks([])
ax2.set_xticks([])
ax2.set_yticks([])
ax1.set_xlabel('Input image')
ax2.set_xlabel('Activation of filter 2');
Output of filter 16:
f, (ax1, ax2) = plt.subplots(1, 2, sharey=True)
ax1.imshow(img_tensor[0])
ax2.matshow(first_layer_activation[0, :, :,16], cmap='viridis')
ax1.set_xticks([])
ax1.set_yticks([])
ax2.set_xticks([])
ax2.set_yticks([])
ax1.set_xlabel('Input image')
ax2.set_xlabel('Activation of filter 16');
The same filter responds quite differently for other inputs
img_path = os.path.join(base_dir, 'test/dogs/dog.1528.jpg')
# We preprocess the image into a 4D tensor
img = image.load_img(img_path, target_size=(150, 150))
img_tensor2 = image.img_to_array(img)
img_tensor2 = np.expand_dims(img_tensor2, axis=0)
# Remember that the model was trained on inputs
# that were preprocessed in the following way:
img_tensor2 /= 255.
activations2 = activation_model.predict(img_tensor2)
plt.rcParams['figure.dpi'] = 120
first_layer_activation2 = activations2[0]
f, (ax1, ax2) = plt.subplots(1, 2, sharey=True)
ax1.imshow(img_tensor2[0])
ax2.matshow(first_layer_activation2[0, :, :, 16], cmap='viridis')
ax1.set_xticks([])
ax1.set_yticks([])
ax2.set_xticks([])
ax2.set_yticks([])
ax1.set_xlabel('Input image')
ax2.set_xlabel('Activation of filter 16');
plot_activations(0, activations)
plot_activations(2, activations)
plot_activations(4, activations)
plot_activations(6, activations)
plot_activations(6, activations2)

from keras import backend as K
input_img = np.random.random((1, size, size, 3)) * 20 + 128.
loss = K.mean(layer_output[:, :, :, filter_index])
grads = K.gradients(loss, model.input)[0] # Compute gradient
for i in range(40): # Run gradient ascent for 40 steps
loss_v, grads_v = K.function([input_img], [loss, grads])
input_img_data += grads_v * step
tf.compat.v1.disable_eager_execution()
# Convert tensor to image
def deprocess_image(x):
# normalize tensor: center on 0., ensure std is 0.1
x -= x.mean()
x /= (x.std() + 1e-5)
x *= 0.1
# clip to [0, 1]
x += 0.5
x = np.clip(x, 0, 1)
# convert to RGB array
x *= 255
x = np.clip(x, 0, 255).astype('uint8')
return x
def generate_pattern(layer_name, filter_index, size=150):
# Build a loss function that maximizes the activation
# of the nth filter of the layer considered.
layer_output = model.get_layer(layer_name).output
loss = K.mean(layer_output[:, :, :, filter_index])
# Compute the gradient of the input picture wrt this loss
grads = K.gradients(loss, model.input)[0]
# Normalization trick: we normalize the gradient
grads /= (K.sqrt(K.mean(K.square(grads))) + 1e-5)
# This function returns the loss and grads given the input picture
iterate = K.function([model.input], [loss, grads])
# We start from a gray image with some noise
input_img_data = np.random.random((1, size, size, 3)) * 20 + 128.
# Run gradient ascent for 40 steps
step = 1.
for i in range(40):
loss_value, grads_value = iterate([input_img_data])
input_img_data += grads_value * step
img = input_img_data[0]
return deprocess_image(img)
def visualize_filter(layer_name):
size = 64
margin = 5
# This a empty (black) image where we will store our results.
results = np.zeros((8 * size + 7 * margin, 8 * size + 7 * margin, 3))
for i in range(8): # iterate over the rows of our results grid
for j in range(8): # iterate over the columns of our results grid
# Generate the pattern for filter `i + (j * 8)` in `layer_name`
filter_img = generate_pattern(layer_name, i + (j * 8), size=size)
# Put the result in the square `(i, j)` of the results grid
horizontal_start = i * size + i * margin
horizontal_end = horizontal_start + size
vertical_start = j * size + j * margin
vertical_end = vertical_start + size
results[horizontal_start: horizontal_end, vertical_start: vertical_end, :] = filter_img
# Display the results grid
plt.figure(figsize=(10, 10))
plt.imshow((results * 255).astype(np.uint8))
plt.show()
from tensorflow.keras import backend as K
plt.rcParams['figure.dpi'] = 60
visualize_filter('conv2d_9')
visualize_filter('conv2d_11')
Let's do this again for the VGG16 network pretrained on ImageNet (much larger)
model = VGG16(weights='imagenet', include_top=False)
# VGG16 model
model.summary()
Model: "vgg16" _________________________________________________________________ Layer (type) Output Shape Param # ================================================================= input_1 (InputLayer) [(None, None, None, 3)] 0 _________________________________________________________________ block1_conv1 (Conv2D) (None, None, None, 64) 1792 _________________________________________________________________ block1_conv2 (Conv2D) (None, None, None, 64) 36928 _________________________________________________________________ block1_pool (MaxPooling2D) (None, None, None, 64) 0 _________________________________________________________________ block2_conv1 (Conv2D) (None, None, None, 128) 73856 _________________________________________________________________ block2_conv2 (Conv2D) (None, None, None, 128) 147584 _________________________________________________________________ block2_pool (MaxPooling2D) (None, None, None, 128) 0 _________________________________________________________________ block3_conv1 (Conv2D) (None, None, None, 256) 295168 _________________________________________________________________ block3_conv2 (Conv2D) (None, None, None, 256) 590080 _________________________________________________________________ block3_conv3 (Conv2D) (None, None, None, 256) 590080 _________________________________________________________________ block3_pool (MaxPooling2D) (None, None, None, 256) 0 _________________________________________________________________ block4_conv1 (Conv2D) (None, None, None, 512) 1180160 _________________________________________________________________ block4_conv2 (Conv2D) (None, None, None, 512) 2359808 _________________________________________________________________ block4_conv3 (Conv2D) (None, None, None, 512) 2359808 _________________________________________________________________ block4_pool (MaxPooling2D) (None, None, None, 512) 0 _________________________________________________________________ block5_conv1 (Conv2D) (None, None, None, 512) 2359808 _________________________________________________________________ block5_conv2 (Conv2D) (None, None, None, 512) 2359808 _________________________________________________________________ block5_conv3 (Conv2D) (None, None, None, 512) 2359808 _________________________________________________________________ block5_pool (MaxPooling2D) (None, None, None, 512) 0 ================================================================= Total params: 14,714,688 Trainable params: 14,714,688 Non-trainable params: 0 _________________________________________________________________
for i in range(3):
plt.subplot(131+i)
plt.xticks([])
plt.yticks([])
plt.imshow(generate_pattern('block3_conv1', i))
plt.tight_layout()
plt.show();
First 64 filters for 1st convolutional layer in block 1: simple edges and colors
plt.rcParams['figure.dpi'] = 60
visualize_filter('block1_conv1')
Filters in 2nd block of convolution layers: simple textures (combined edges and colors)
visualize_filter('block2_conv1')
Filters in 3rd block of convolution layers: more natural textures
visualize_filter('block3_conv1')
Filters in 4th block of convolution layers: feathers, eyes, leaves,...
visualize_filter('block4_conv1')

Illustration (cats vs dogs)
plot_activations(6, activations2)
More realistic example:
model = VGG16(weights='imagenet')

Preprocessing
from keras.applications.vgg16 import preprocess_input
img_path = '../images/10_elephants.jpg'
img = image.load_img(img_path, target_size=(224, 224))
x = image.img_to_array(img)
x = np.expand_dims(x, axis=0) # Transform to batch of size (1, 224, 224, 3)
x = preprocess_input(x)
preds = model.predict(x)
preds = model.predict(x)
print('Predicted:', decode_predictions(preds, top=3)[0])
Predicted: [('n02504458', 'African_elephant', 0.909421), ('n01871265', 'tusker', 0.086182885), ('n02504013', 'Indian_elephant', 0.0043545826)]
Visualize the class activation map
heatmap = np.maximum(heatmap, 0)
heatmap /= np.max(heatmap)
plt.matshow(heatmap)
plt.show()
Superimposed on the original image
img = cv2.imread('../images/elephant_cam.jpg')
RGB_im = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
plt.rcParams['figure.dpi'] = 120
plt.imshow(RGB_im)
plt.title('Class activation map')
plt.xticks([])
plt.yticks([])
plt.show()

conv_base = VGG16(weights='imagenet', include_top=False, input_shape=(150, 150, 3))
conv_base.summary()
Model: "vgg16" _________________________________________________________________ Layer (type) Output Shape Param # ================================================================= input_2 (InputLayer) [(None, 150, 150, 3)] 0 _________________________________________________________________ block1_conv1 (Conv2D) (None, 150, 150, 64) 1792 _________________________________________________________________ block1_conv2 (Conv2D) (None, 150, 150, 64) 36928 _________________________________________________________________ block1_pool (MaxPooling2D) (None, 75, 75, 64) 0 _________________________________________________________________ block2_conv1 (Conv2D) (None, 75, 75, 128) 73856 _________________________________________________________________ block2_conv2 (Conv2D) (None, 75, 75, 128) 147584 _________________________________________________________________ block2_pool (MaxPooling2D) (None, 37, 37, 128) 0 _________________________________________________________________ block3_conv1 (Conv2D) (None, 37, 37, 256) 295168 _________________________________________________________________ block3_conv2 (Conv2D) (None, 37, 37, 256) 590080 _________________________________________________________________ block3_conv3 (Conv2D) (None, 37, 37, 256) 590080 _________________________________________________________________ block3_pool (MaxPooling2D) (None, 18, 18, 256) 0 _________________________________________________________________ block4_conv1 (Conv2D) (None, 18, 18, 512) 1180160 _________________________________________________________________ block4_conv2 (Conv2D) (None, 18, 18, 512) 2359808 _________________________________________________________________ block4_conv3 (Conv2D) (None, 18, 18, 512) 2359808 _________________________________________________________________ block4_pool (MaxPooling2D) (None, 9, 9, 512) 0 _________________________________________________________________ block5_conv1 (Conv2D) (None, 9, 9, 512) 2359808 _________________________________________________________________ block5_conv2 (Conv2D) (None, 9, 9, 512) 2359808 _________________________________________________________________ block5_conv3 (Conv2D) (None, 9, 9, 512) 2359808 _________________________________________________________________ block5_pool (MaxPooling2D) (None, 4, 4, 512) 0 ================================================================= Total params: 14,714,688 Trainable params: 14,714,688 Non-trainable params: 0 _________________________________________________________________
predict from the convolutional base to build new features
generator = datagen.flow_from_directory(dir, target_size=(150, 150),
batch_size=batch_size, class_mode='binary')
for inputs_batch, labels_batch in generator:
features_batch = conv_base.predict(inputs_batch)
model = models.Sequential()
model.add(layers.Dense(256, activation='relu', input_dim=4 * 4 * 512))
model.add(layers.Dropout(0.5))
model.add(layers.Dense(1, activation='sigmoid'))
history = pickle.load(open("../data/models/cats_and_dogs_small_3a_history.p", "rb"))
print("Max val_acc",np.max(history['val_acc']))
pd.DataFrame(history).plot(lw=2,style=['b:','r:','b-','r-']);
plt.xlabel('epochs');
Max val_acc 0.9070000052452087
model = models.Sequential()
model.add(conv_base)
model.add(layers.Flatten())
model.add(layers.Dense(256, activation='relu'))
model.add(layers.Dense(1, activation='sigmoid'))
conv_base.trainable = False
model.summary()
Model: "sequential_3" _________________________________________________________________ Layer (type) Output Shape Param # ================================================================= vgg16 (Model) (None, 4, 4, 512) 14714688 _________________________________________________________________ flatten_3 (Flatten) (None, 8192) 0 _________________________________________________________________ dense_6 (Dense) (None, 256) 2097408 _________________________________________________________________ dense_7 (Dense) (None, 1) 257 ================================================================= Total params: 16,812,353 Trainable params: 2,097,665 Non-trainable params: 14,714,688 _________________________________________________________________
Data augmentation and training (takes a LONG time)
train_datagen = ImageDataGenerator(
rescale=1./255, rotation_range=40, width_shift_range=0.2,
height_shift_range=0.2, shear_range=0.2, zoom_range=0.2,
horizontal_flip=True, fill_mode='nearest')
train_generator = train_datagen.flow_from_directory(dir,
target_size=(150, 150), batch_size=20, class_mode='binary')
history = model.fit_generator(
train_generator, steps_per_epoch=100, epochs=30,
validation_data=validation_generator, validation_steps=50)
We now get about 90% accuracy again, and very little overfitting
history = pickle.load(open("../data/models/cats_and_dogs_small_3b_history.p", "rb"))
print("Max val_acc",np.max(history['val_acc']))
pd.DataFrame(history).plot(lw=2,style=['b:','r:','b-','r-']);
plt.xlabel('epochs');
Max val_acc 0.906
for layer in conv_base.layers:
if layer.name == 'block5_conv1':
layer.trainable = True
else:
layer.trainable = False
Visualized

model = load_model(os.path.join(model_dir, 'cats_and_dogs_small_3b.h5'))
model.compile(loss='binary_crossentropy',
optimizer=optimizers.RMSprop(lr=1e-5),
metrics=['acc'])
history = model.fit(
train_generator, steps_per_epoch=100, epochs=100,
validation_data=validation_generator,
validation_steps=50)
Almost 95% accuracy. The curves are quite noisy, though.
history = pickle.load(open("../data/models/cats_and_dogs_small_3c_history.p", "rb"))
print("Max val_acc",np.max(history['val_acc']))
pd.DataFrame(history).plot(lw=2,style=['b:','r:','b-','r-']);
plt.xlabel('epochs');
Max val_acc 0.948
def smooth_curve(points, factor=0.8):
smoothed_points = []
for point in points:
if smoothed_points:
previous = smoothed_points[-1]
smoothed_points.append(previous * factor + point * (1 - factor))
else:
smoothed_points.append(point)
return smoothed_points
smooth_history = {}
smooth_history['loss'] = smooth_curve(history['loss'])
smooth_history['acc'] = smooth_curve(history['acc'])
smooth_history['val_loss'] = smooth_curve(history['val_loss'])
smooth_history['val_acc'] = smooth_curve(history['val_acc'])
print("Max val_acc",np.max(smooth_history['val_acc']))
pd.DataFrame(smooth_history).plot(lw=2,style=['b:','r:','b-','r-']);
plt.xlabel('epochs');
Max val_acc 0.9408402440315202
Finally, evaluate the trained model on the test set. This is consistent with the validation results.
test_generator = test_datagen.flow_from_directory(
test_dir,
target_size=(150, 150),
batch_size=20,
class_mode='binary')
model = load_model(os.path.join(model_dir, 'cats_and_dogs_small_3c.h5'))
test_loss, test_acc = model.evaluate(test_generator, steps=50)
print('test acc:', test_acc)
Found 1000 images belonging to 2 classes. 50/50 [==============================] - 126s 3s/step - loss: 0.3771 - acc: 0.9390 test acc: 0.939